/*
 * Copyright (C) by Argonne National Laboratory
 *     See COPYRIGHT in top-level directory
 */

#include "adio.h"
#include "adio_extern.h"
#ifdef AGGREGATION_PROFILE
#include "mpe.h"
#endif
#include <assert.h>

/*
#define DEBUG
#define DEBUG2
*/

#define COUNT_EXCH          0
#define BLOCK_LENS          1
#define INDICES             2
#define FPIND_DISP_OFF_SZ   3


typedef struct {
    int count;
    ADIO_Offset fp_ind;
    ADIO_Offset disp;
    ADIO_Offset byte_off;
    ADIO_Offset sz;
    ADIO_Offset ext;
    ADIO_Offset type_sz;
} amount_and_extra_data_t;

/* Debugging function to print out an ADIOI_Flatlist_node. */
void ADIOI_Print_flatlist_node(ADIOI_Flatlist_node * flatlist_node_p)
{
    int i;
    if (flatlist_node_p == NULL) {
        fprintf(stderr, "print flatlist node of NULL ptr\n");
        return;
    }
    fprintf(stderr, "print flatlist node count = %d (idx,blocklen)\n",
            (int) flatlist_node_p->count);
    for (i = 0; i < flatlist_node_p->count; i++) {
        if (i % 5 == 0 && i != 0) {
            fprintf(stderr, "%d=(%lld,%lld)\n", i, (long long) flatlist_node_p->indices[i],
                    (long long) flatlist_node_p->blocklens[i]);
        } else
            fprintf(stderr, "%d=(%lld,%lld) ", i, (long long) flatlist_node_p->indices[i],
                    (long long) flatlist_node_p->blocklens[i]);
    }
    fprintf(stderr, "\n");
}


/* ADIOI_Exchange_file_views - Sends all the aggregators the file
 * views and file view states of the clients.  It fills in the
 * client_file_view_state_arr for the aggregators and the
 * my_mem_view_state for the client.  It also initializes the
 * agg_file_view_state for all clients, which is the view for each
 * aggregator of a client's filetype. */
void ADIOI_Exch_file_views(int myrank, int nprocs, int file_ptr_type,
                           ADIO_File fd, int count,
                           MPI_Datatype datatype, ADIO_Offset off,
                           view_state * my_mem_view_state_arr,
                           view_state * agg_file_view_state_arr,
                           view_state * client_file_view_state_arr)
{
    /* Convert my own fileview to an ADIOI_Flattened type and a
     * disp. MPI_Alltoall the count of ADIOI_Flatlist nodes.
     * MPI_Isend/Irecv the block_lens, indices of ADIOI_Flatlist node
     * to/from each of the aggregators with the rest of the file view
     * state. */

    int i = -1, j = -1;
    amount_and_extra_data_t *send_count_arr = NULL;
    amount_and_extra_data_t *recv_count_arr = NULL;
    int send_req_arr_sz = 0;
    int recv_req_arr_sz = 0;
    MPI_Request *send_req_arr = NULL, *recv_req_arr = NULL;
    MPI_Status *statuses = NULL;
    ADIO_Offset disp_off_sz_ext_typesz[6];
    MPI_Aint lb, memtype_extent, filetype_extent;
    int ret = -1;

    /* parameters for datatypes */
    ADIOI_Flatlist_node *flat_mem_p = NULL, *flat_file_p = NULL;
    MPI_Count memtype_sz = -1;
    int memtype_is_contig = -1;
    ADIO_Offset filetype_sz = -1;

#ifdef AGGREGATION_PROFILE
    MPE_Log_event(5014, 0, NULL);
#endif
    /* The memtype will be freed after the call.  The filetype will be
     * freed in the close and should have been flattened in the file
     * view. */
    MPI_Type_size_x(datatype, &memtype_sz);
    MPI_Type_get_extent(datatype, &lb, &memtype_extent);
    if (memtype_sz == memtype_extent) {
        memtype_is_contig = 1;
        flat_mem_p = ADIOI_Flatten_and_find(datatype);
        flat_mem_p->blocklens[0] = memtype_sz * count;
    } else {
        flat_mem_p = ADIOI_Flatten_and_find(datatype);
    }

    MPI_Type_get_extent(fd->filetype, &lb, &filetype_extent);
    MPI_Type_size_x(fd->filetype, &filetype_sz);
    flat_file_p = ADIOI_Flatten_and_find(fd->filetype);
    if (filetype_extent == filetype_sz) {
        flat_file_p->blocklens[0] = memtype_sz * count;
        filetype_extent = memtype_sz * count;
        filetype_sz = filetype_extent;
    }

    disp_off_sz_ext_typesz[0] = fd->fp_ind;
    disp_off_sz_ext_typesz[1] = fd->disp;
    disp_off_sz_ext_typesz[2] = off;
    disp_off_sz_ext_typesz[3] = memtype_sz * count;
    disp_off_sz_ext_typesz[4] = (ADIO_Offset) filetype_extent;
    disp_off_sz_ext_typesz[5] = (ADIO_Offset) filetype_sz;

    if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
        recv_count_arr = ADIOI_Calloc(nprocs, sizeof(amount_and_extra_data_t));
        send_count_arr = ADIOI_Calloc(nprocs, sizeof(amount_and_extra_data_t));
    } else {
        send_count_arr = ADIOI_Calloc(fd->hints->cb_nodes, sizeof(amount_and_extra_data_t));

        /* only aggregators receive data */
        if (fd->is_agg) {
            recv_count_arr = ADIOI_Calloc(nprocs, sizeof(amount_and_extra_data_t));
            recv_req_arr = ADIOI_Malloc(nprocs * sizeof(MPI_Request));
            for (i = 0; i < nprocs; i++)
                MPI_Irecv(&recv_count_arr[i], sizeof(amount_and_extra_data_t),
                          MPI_BYTE, i, COUNT_EXCH, fd->comm, &recv_req_arr[i]);
        }

        /* only send data to aggregators */
        send_req_arr = ADIOI_Calloc(fd->hints->cb_nodes, sizeof(MPI_Request));
        for (i = 0; i < fd->hints->cb_nodes; i++) {
            send_count_arr[i].count = flat_file_p->count;
            send_count_arr[i].fp_ind = disp_off_sz_ext_typesz[0];
            send_count_arr[i].disp = disp_off_sz_ext_typesz[1];
            send_count_arr[i].byte_off = disp_off_sz_ext_typesz[2];
            send_count_arr[i].sz = disp_off_sz_ext_typesz[3];
            send_count_arr[i].ext = disp_off_sz_ext_typesz[4];
            send_count_arr[i].type_sz = disp_off_sz_ext_typesz[5];
            MPI_Isend(&send_count_arr[i], sizeof(amount_and_extra_data_t),
                      MPI_BYTE, fd->hints->ranklist[i], COUNT_EXCH, fd->comm, &send_req_arr[i]);
        }
    }


    /* Every client has to build mem and file view_states for each aggregator.
     * We initialize their values here.  and we also initialize
     * send_count_arr */

    if (memtype_is_contig) {
        /* if memory is contiguous, we now replace memtype_sz and
         * memtype_extent with the full access size */
        memtype_sz *= count;
        memtype_extent = memtype_sz;
    }

    for (i = 0; i < fd->hints->cb_nodes; i++) {
        int tmp_agg_idx = fd->hints->ranklist[i];
        memset(&(my_mem_view_state_arr[tmp_agg_idx]), 0, sizeof(view_state));
        my_mem_view_state_arr[tmp_agg_idx].sz = disp_off_sz_ext_typesz[3];
        my_mem_view_state_arr[tmp_agg_idx].ext = (ADIO_Offset) memtype_extent;
        my_mem_view_state_arr[tmp_agg_idx].type_sz = (ADIO_Offset) memtype_sz;
        my_mem_view_state_arr[tmp_agg_idx].flat_type_p = flat_mem_p;
        ADIOI_init_view_state(file_ptr_type, 1, &(my_mem_view_state_arr[tmp_agg_idx]), TEMP_OFF);
        ADIOI_init_view_state(file_ptr_type, 1, &(my_mem_view_state_arr[tmp_agg_idx]), REAL_OFF);

        memset(&(agg_file_view_state_arr[tmp_agg_idx]), 0, sizeof(view_state));
        agg_file_view_state_arr[tmp_agg_idx].fp_ind = disp_off_sz_ext_typesz[0];
        agg_file_view_state_arr[tmp_agg_idx].disp = disp_off_sz_ext_typesz[1];
        agg_file_view_state_arr[tmp_agg_idx].byte_off = disp_off_sz_ext_typesz[2];
        agg_file_view_state_arr[tmp_agg_idx].sz = disp_off_sz_ext_typesz[3];
        agg_file_view_state_arr[tmp_agg_idx].ext = disp_off_sz_ext_typesz[4];
        agg_file_view_state_arr[tmp_agg_idx].type_sz = disp_off_sz_ext_typesz[5];
        agg_file_view_state_arr[tmp_agg_idx].flat_type_p = flat_file_p;

        ADIOI_init_view_state(file_ptr_type, 1, &(agg_file_view_state_arr[tmp_agg_idx]), TEMP_OFF);
        ADIOI_init_view_state(file_ptr_type, 1, &(agg_file_view_state_arr[tmp_agg_idx]), REAL_OFF);

        if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
            send_count_arr[tmp_agg_idx].count = flat_file_p->count;
            send_count_arr[tmp_agg_idx].fp_ind = disp_off_sz_ext_typesz[0];
            send_count_arr[tmp_agg_idx].disp = disp_off_sz_ext_typesz[1];
            send_count_arr[tmp_agg_idx].byte_off = disp_off_sz_ext_typesz[2];
            send_count_arr[tmp_agg_idx].sz = disp_off_sz_ext_typesz[3];
            send_count_arr[tmp_agg_idx].ext = disp_off_sz_ext_typesz[4];
            send_count_arr[tmp_agg_idx].type_sz = disp_off_sz_ext_typesz[5];
        }
    }

#ifdef DEBUG2
    fprintf(stderr, "my own flattened memtype: ");
    ADIOI_Print_flatlist_node(flat_mem_p);
    fprintf(stderr, "my own flattened filetype: ");
    ADIOI_Print_flatlist_node(flat_file_p);
#endif

    if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
        ret = MPI_Alltoall(send_count_arr, sizeof(amount_and_extra_data_t),
                           MPI_BYTE,
                           recv_count_arr, sizeof(amount_and_extra_data_t), MPI_BYTE, fd->comm);
        if (ret != MPI_SUCCESS) {
            fprintf(stderr, "ADIOI_Exchange_file_views: MPI_Alltoall failed " "with error %d", ret);
            return;
        }
    } else {
#ifdef MPI_STATUSES_IGNORE
        statuses = MPI_STATUSES_IGNORE;
#else
        statuses = (MPI_Status *) ADIOI_Malloc(1 + nprocs * sizeof(MPI_Status));
#endif
        if (fd->is_agg) {
            MPI_Waitall(nprocs, recv_req_arr, statuses);
            ADIOI_Free(recv_req_arr);
        }
        MPI_Waitall(fd->hints->cb_nodes, send_req_arr, statuses);
#ifndef MPI_STATUSES_IGNORE
        ADIOI_Free(statuses);
#endif
        ADIOI_Free(send_req_arr);
    }
#ifdef DEBUG2
    if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
        fprintf(stderr, "send_count_arr:");
        for (i = 0; i < nprocs; i++) {
            fprintf(stderr, "[%d]=%d ", i, send_count_arr[i].count);
        }
        fprintf(stderr, "\n");
        fprintf(stderr, "recv_count_arr:");
        for (i = 0; i < nprocs; i++) {
            fprintf(stderr, "[%d]=%d ", i, recv_count_arr[i].count);
        }
        fprintf(stderr, "\n");
    } else {
        fprintf(stderr, "send_count_arr:");
        for (i = 0; i < fd->hints->cb_nodes; i++) {
            fprintf(stderr, "[%d]=%d ", i, send_count_arr[i].count);
        }
        fprintf(stderr, "\n");
        if (fd->is_agg) {
            fprintf(stderr, "recv_count_arr:");
            for (i = 0; i < nprocs; i++) {
                fprintf(stderr, "[%d]=%d ", i, recv_count_arr[i].count);
            }
            fprintf(stderr, "\n");
        }
    }
#endif

    if (fd->hints->cb_alltoall == ADIOI_HINT_DISABLE) {
        for (i = 0; i < fd->hints->cb_nodes; i++)
            if (send_count_arr[i].count > 0)
                send_req_arr_sz++;
    }
    /* Figure out how many counts to send/recv */
    for (i = 0; i < nprocs; i++) {
        if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
            if (send_count_arr[i].count > 0)
                send_req_arr_sz++;
        }
        /* Only aggregators should recv */
        if (fd->is_agg) {
            if (recv_count_arr[i].count > 0) {
                if ((client_file_view_state_arr[i].flat_type_p =
                     (ADIOI_Flatlist_node *) ADIOI_Malloc(sizeof(ADIOI_Flatlist_node))) == NULL) {
                    fprintf(stderr, "ADIOI_Exchange_file_views: malloc " "flat_type_p failed\n");
                }
                client_file_view_state_arr[i].flat_type_p->count = recv_count_arr[i].count;
                client_file_view_state_arr[i].flat_type_p->indices =
                    (ADIO_Offset *) ADIOI_Calloc(recv_count_arr[i].count, sizeof(ADIO_Offset));
                client_file_view_state_arr[i].flat_type_p->blocklens =
                    (ADIO_Offset *) ADIOI_Calloc(recv_count_arr[i].count, sizeof(ADIO_Offset));

                /* Copy the extra data out of the stuff we Alltoall'd */
                memcpy(&client_file_view_state_arr[i].fp_ind,
                       &recv_count_arr[i].fp_ind, 6 * sizeof(ADIO_Offset));

                recv_req_arr_sz++;
            }
        }
    }

    /* Since ADIOI_Calloc may do other things we add the +1
     * to avoid a 0-size malloc */
    send_req_arr = (MPI_Request *) ADIOI_Calloc(2 * (send_req_arr_sz) + 1, sizeof(MPI_Request));

    j = 0;
    if (recv_req_arr_sz > 0) {
        assert(fd->is_agg);
        recv_req_arr = (MPI_Request *) ADIOI_Calloc(2 * (recv_req_arr_sz), sizeof(MPI_Request));
        for (i = 0; i < nprocs; i++) {
            if (recv_count_arr[i].count > 0) {
                MPI_Irecv(client_file_view_state_arr[i].flat_type_p->indices,
                          recv_count_arr[i].count, ADIO_OFFSET, i,
                          INDICES, fd->comm, &recv_req_arr[j]);
                j++;
                MPI_Irecv(client_file_view_state_arr[i].flat_type_p->blocklens,
                          recv_count_arr[i].count, ADIO_OFFSET, i,
                          BLOCK_LENS, fd->comm, &recv_req_arr[j]);
                j++;
            }
        }
    }

    if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
        j = 0;
        for (i = 0; i < nprocs; i++) {
            if (send_count_arr[i].count > 0) {
                MPI_Isend(flat_file_p->indices,
                          send_count_arr[i].count, ADIO_OFFSET, i,
                          INDICES, fd->comm, &send_req_arr[j]);
                j++;
                MPI_Isend(flat_file_p->blocklens,
                          send_count_arr[i].count, ADIO_OFFSET, i,
                          BLOCK_LENS, fd->comm, &send_req_arr[j]);
                j++;
            }
        }
    } else {
        j = 0;
        for (i = 0; i < fd->hints->cb_nodes; i++) {
            if (send_count_arr[i].count > 0) {
                MPI_Isend(flat_file_p->indices,
                          send_count_arr[i].count, ADIO_OFFSET,
                          fd->hints->ranklist[i], INDICES, fd->comm, &send_req_arr[j]);
                j++;
                MPI_Isend(flat_file_p->blocklens,
                          send_count_arr[i].count, ADIO_OFFSET,
                          fd->hints->ranklist[i], BLOCK_LENS, fd->comm, &send_req_arr[j]);
                j++;
            }
        }
    }

    /* Since ADIOI_Malloc may do other things we add the +1
     * to avoid a 0-size malloc */
#ifdef MPI_STATUSES_IGNORE
    statuses = MPI_STATUSES_IGNORE;
#else
    statuses = (MPI_Status *)
        ADIOI_Malloc(1 + 2 * MPL_MAX(send_req_arr_sz, recv_req_arr_sz)
                     * sizeof(MPI_Status));
#endif

    if (send_req_arr_sz > 0) {
        MPI_Waitall(2 * send_req_arr_sz, send_req_arr, statuses);
        ADIOI_Free(send_count_arr);
        ADIOI_Free(send_req_arr);
    }
    if (recv_req_arr_sz > 0) {
        MPI_Waitall(2 * recv_req_arr_sz, recv_req_arr, statuses);
        ADIOI_Free(recv_count_arr);
        ADIOI_Free(recv_req_arr);
    }
#ifndef MPI_STATUSES_IGNORE
    ADIOI_Free(statuses);
#endif

    if (fd->is_agg == 1) {
        ADIOI_init_view_state(file_ptr_type, nprocs, client_file_view_state_arr, TEMP_OFF);
        ADIOI_init_view_state(file_ptr_type, nprocs, client_file_view_state_arr, REAL_OFF);
    }
#ifdef DEBUG
    if (fd->is_agg == 1) {
        ADIOI_Flatlist_node *fr_node_p;
        for (i = 0; i < nprocs; i++) {
            fprintf(stderr, "client_file_view_state_arr[%d]=(fp_ind=%Ld,"
                    "disp=%Ld,byte_off=%Ld,sz=%Ld,ext=%Ld\n", i,
                    client_file_view_state_arr[i].fp_ind,
                    client_file_view_state_arr[i].disp,
                    client_file_view_state_arr[i].byte_off,
                    client_file_view_state_arr[i].sz, client_file_view_state_arr[i].ext);
        }

        fr_node_p = ADIOI_Flatten_and_find(fd->file_realm_types[fd->my_cb_nodes_index]);
        assert(fr_node_p != NULL);

        fprintf(stderr, "my file realm (idx=%d,st_off=%Ld) ",
                fd->my_cb_nodes_index, fd->file_realm_st_offs[fd->my_cb_nodes_index]);
        ADIOI_Print_flatlist_node(fr_node_p);
    }
#endif

#ifdef DEBUG2
    if (fd->is_agg == 1) {
        for (i = 0; i < nprocs; i++) {
            fprintf(stderr, "client_file_view_state_arr[%d]: ", i);
            ADIOI_Print_flatlist_node(client_file_view_state_arr[i].flat_type_p);
        }
    }
#endif
#ifdef AGGREGATION_PROFILE
    MPE_Log_event(5015, 0, NULL);
#endif
}
